Packages(just in case haven’t installed)

library(tidyverse)
library(tidytext)
library(ggrepel)
library(plotly)

Data readin

words <- read_csv('data/sentiment-data.csv', col_names = F)
words
## # A tibble: 127 × 3
##    X1                                                                X2    X3   
##    <chr>                                                             <chr> <chr>
##  1 Write down four words to describe how you're feeling about this … Writ… Writ…
##  2 Excited, Hopeful, Prepared, good                                  Ready Info…
##  3 Unsure, confused, anxious, curious                                apat… Exci…
##  4 Co operations, Teamwork, communication, critical thinking         Team… I wi…
##  5 a                                                                 a     a    
##  6 First, team work, nervous, curious                                Nerv… New  
##  7 Interesting. New. Exciting. Develop                               Inte… Exci…
##  8 perplexed,anxious,embarrassed,bit excited                      hope… resp…
##  9 Novel, Unknown, Challenging, Useful                               Nerv… accu…
## 10 Worried, excited, self-doubt, motivated                           Nerv… Hope…
## # … with 117 more rows

Data Cleaning

# complicated data cleaning for first question :(
col1_words <- words %>% 
  slice(-1) %>% 
  select(X1) %>% 
  unnest_tokens(output = word, input = X1, token = str_split, pattern = ',|、|\\*|\\. |,') %>% 
  mutate(word = str_trim(word)) %>% 
  filter(str_count(word, '\\ ') < 4 & str_length(word) > 2) %>%
  mutate(word = ifelse(str_count(word, '\\ ') == 3, str_split(word, '\\ '), word)) %>% 
  unnest(word) %>% 
  mutate(word = str_remove(word, '.{8}\\)|\\ to$|^be\\ |^bit\\ |.{18}'),
         word = str_split(word, '\\/')) %>%
  unnest(word) %>% 
  filter(str_length(word) > 0)

col1_words
## # A tibble: 459 × 1
##    word         
##    <chr>        
##  1 excited      
##  2 hopeful      
##  3 prepared     
##  4 good         
##  5 unsure       
##  6 confused     
##  7 anxious      
##  8 curious      
##  9 co operations
## 10 teamwork     
## # … with 449 more rows

top 20 most frequent words in question one with its visualization

top_20 <- col1_words %>% 
  count(word, sort = T) %>% 
  head(20);top_20
## # A tibble: 20 × 2
##    word             n
##    <chr>        <int>
##  1 nervous         59
##  2 excited         55
##  3 curious         34
##  4 anxious         14
##  5 scared          12
##  6 worried         12
##  7 interested      11
##  8 interesting     11
##  9 hopeful         10
## 10 difficult        9
## 11 challenging      6
## 12 confused         6
## 13 unknown          6
## 14 apprehensive     5
## 15 exciting         5
## 16 happy            5
## 17 stressed         5
## 18 teamwork         5
## 19 motivated        4
## 20 uncertain        4
top_20 %>% 
  mutate(word = fct_reorder(word, n)) %>% 
  ggplot(aes(n, word, fill = word)) + 
  geom_col(show.legend = F, width = .5) + 
  scale_fill_viridis_d() +
  theme(axis.text.y = element_blank(), 
        axis.ticks.y = element_blank(), 
        axis.title.y = element_text(angle = 0, vjust = .5, size = rel(1.2))) +
  xlab('frequent') + 
  xlim(c(0,70)) + 
  geom_text(aes(label=word), hjust = -.5)

sentiment words comparison

p <- col1_words %>% 
  count(word) %>% 
  inner_join(get_sentiments()) %>% 
  group_by(sentiment) %>% 
  slice_max(n, n = 9) %>% 
  ungroup() %>% 
  mutate(n = ifelse(sentiment == "negative", -n, n)) %>%
  mutate(word = fct_reorder(word, n)) %>%
  ggplot() +
  geom_col(aes(n, word, fill = sentiment)) +
  labs(x = "Contribution to sentiment")
ggplotly(p)
col1_words %>% 
  count(word) %>% 
  inner_join(get_sentiments()) %>% 
  ggplot(aes(0,0)) +
  geom_text_repel(aes(label = word, size = n, colour = sentiment),
                  force_pull = 0, max.overlaps = Inf,
                  segment.color = NA, point.padding = NA, seed = 399, show.legend = F) +
  facet_grid(~ sentiment) +
  theme_bw() +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  labs(x = "", y = "")